import numpy as np
import pandas as pd
# sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons, make_circles, make_classification
# Tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Visualisation libraries
## matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
## seaborn
import seaborn as sns
sns.set_context('paper', rc={'font.size':12,'axes.titlesize':14,'axes.labelsize':12})
sns.set_style('white')
plt.style.use('seaborn-whitegrid')
import warnings
warnings.filterwarnings("ignore")
Machine learning can be categorized as supervised, semi-supervised, or unsupervised learning methods. Deep learning can be supervised or unsupervised, and it is based on artificial neural networks (ANN) with representation learning. In this article, we demonstrate implementing a deep neural network (DNN) in Keras.
Deep Neural Networks (DNNs), which is also known as convolutional networks, is comprised of several layers of nonlinear operations. The goal of deep learning methods is to learn feature hierarchies. Higher levels of features are formed by combining lower level features [1].
a random n-class classification dataset can be generated using sklearn.datasets.make_classification. Here, we generate a dataset with two features and 6000 instances. Moreover, the dataset is generated for multiclass classification with five classes.
X, y = make_classification(n_samples = int(6e3), n_features=2, n_redundant=0, n_classes = 4,
n_informative=2, random_state=1, n_clusters_per_class=1)
Labels = np.unique(y)
Labels = [str(x) for x in Labels]
One of the efficient methods of splitting a dataset into random train and test subsets is using sklearn.model_selection.train_test_split.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
y_train = keras.utils.to_categorical(y_train, num_classes=len(Labels))
y_test = keras.utils.to_categorical(y_test, num_classes=len(Labels))
pd.DataFrame(data={'Set':['X_train','X_test','y_train','y_test'],
'Shape':[X_train.shape, X_test.shape, y_train.shape, y_test.shape]}).set_index('Set').T
| Set | X_train | X_test | y_train | y_test |
|---|---|---|---|---|
| Shape | (4200, 2) | (1800, 2) | (4200, 4) | (1800, 4) |

A multi-layer perceptron (MLP) is a class of feedforward artificial neural network (ANN). scikit-learn.org has a well-written article regarding MLP and interested readers are encouraged to see this article.
Moreover, in this article, we present a multi-class MLP using Keras and focus on implementing this method in Keras. We define our model by using Sequential class. Moreover, we consider the rectified linear unit) (ReLU) as the activation function. An activation function allows for complex relationships in the data to be learned. For the last year, we use the softmax function, also known as softargmax or normalized exponential function.
model = keras.Sequential(name = 'Multi_Class_MLP')
model.add(layers.Dense(64, input_dim = X.shape[1], activation='relu', name='Layer1'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(64, activation='relu', name='Layer2'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(len(Labels), activation='softmax', name='Layer3'))
model.summary()
tf.keras.utils.plot_model(model, show_shapes=True, show_layer_names=True, expand_nested = True, rankdir = 'LR')
Model: "Multi_Class_MLP" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= Layer1 (Dense) (None, 64) 192 _________________________________________________________________ dropout (Dropout) (None, 64) 0 _________________________________________________________________ Layer2 (Dense) (None, 64) 4160 _________________________________________________________________ dropout_1 (Dropout) (None, 64) 0 _________________________________________________________________ Layer3 (Dense) (None, 4) 260 ================================================================= Total params: 4,612 Trainable params: 4,612 Non-trainable params: 0 _________________________________________________________________
# Number of iterations
IT = int(1e3)+1
model.compile(optimizer= keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True),
loss='categorical_crossentropy',
metrics=['accuracy','mae', 'mse'])
# Train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs= IT, batch_size=128, verbose = 0)
def Search_List(Key, List): return [s for s in List if Key in s]
Metrics_Names = {'loss':'Loss', 'accuracy':'Accuracy', 'mae':'MAE', 'mse':'MSE'}
def Table_modify(df, Metrics_Names = Metrics_Names):
df = df.rename(columns = Metrics_Names)
df = df.reindex(sorted(df.columns), axis=1)
df.insert(loc = 0, column = 'Iteration', value = np.arange(0, df.shape[0]), allow_duplicates=False)
return df
Validation_Table = Search_List('val_',history.history.keys())
Train_Table = list(set( history.history.keys()) - set(Validation_Table))
Validation_Table = pd.DataFrame(np.array([history.history[x] for x in Validation_Table]).T, columns = Validation_Table)
Train_Table = pd.DataFrame(np.array([history.history[x] for x in Train_Table]).T, columns = Train_Table)
Validation_Table.columns = [x.replace('val_','') for x in Validation_Table.columns]
Train_Table = Table_modify(Train_Table)
Validation_Table = Table_modify(Validation_Table)
# Train Set Score
score = model.evaluate(X_test, y_test, batch_size=128, verbose = 0)
score = pd.DataFrame(score, index = model.metrics_names).T
score.index = ['Train Set Score']
# Validation Set Score
Temp = model.evaluate(X_train, y_train, batch_size=128, verbose = 0)
Temp = pd.DataFrame(Temp, index = model.metrics_names).T
Temp.index = ['Validation Set Score']
score = score.append(Temp)
score.rename(columns= Metrics_Names, inplace = True)
score = score.reindex(sorted(score.columns), axis=1)
display(score.style.set_precision(4))
| Accuracy | Loss | MAE | MSE | |
|---|---|---|---|---|
| Train Set Score | 0.9044 | 0.2907 | 0.0707 | 0.0365 |
| Validation Set Score | 0.9060 | 0.2737 | 0.0697 | 0.0354 |
Let's define some function by which we can analyze the performance of the modeling.
def Plot_history(history, Title = False, Table_Rows = 25):
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=[0.6, 0.4],
specs=[[{"type": "scatter"},{"type": "table"}]])
# Left
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Loss'].values,
line=dict(color='OrangeRed', width= 1.5), name = 'Loss'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Accuracy'].values,
line=dict(color='MidnightBlue', width= 1.5), name = 'Accuracy'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['MAE'].values,
line=dict(color='ForestGreen', width= 1.5), name = 'Mean Absolute Error (MAE)'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['MSE'].values,
line=dict(color='purple', width= 1.5), name = 'Mean Squared Error (MSE)'), 1, 1)
fig.update_layout(legend=dict(x=0, y=1.1, traceorder='reversed', font_size=12),
dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest',
legend_orientation='h')
fig.update_xaxes(range=[history.Iteration.min(), history.Iteration.max()],
showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
fig.update_yaxes(range=[0, 1], showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
# Right
ind = np.linspace(0, history.shape[0], Table_Rows, endpoint = False).round(0).astype(int)
ind = np.append(ind, history.Iteration.values[-1])
history = history[history.index.isin(ind)]
Temp = []
for i in history.columns:
Temp.append(history.loc[:,i].astype(float).round(4).values)
fig.add_trace(go.Table(header=dict(values = list(history.columns), line_color='darkslategray',
fill_color='DimGray', align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = [0.4, 0.4, 0.4, 0.4],
cells=dict(values=Temp, line_color='darkslategray', fill=dict(color=['WhiteSmoke', 'white']),
align=['center', 'center'], font_size=12,height=20)), 1, 2)
if Title != False:
fig.update_layout(plot_bgcolor= 'white',
title={'text': Title, 'x':0.46, 'y':0.94, 'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
def Confusion_Matrix(Model, X, y, Labels, FG = (14, 5)):
fig, ax = plt.subplots(1, 2, figsize=FG)
y_pred = Model.predict(X)
if y.shape[1] > 1:
CM = confusion_matrix(y.argmax(axis = 1), y_pred.argmax(axis = 1))
else:
CM = confusion_matrix(y, np.round(y_pred))
_ = sns.heatmap(CM.round(2), annot=True, annot_kws={"size": 14}, cmap="Blues", ax = ax[0])
_ = ax[0].set_xlabel('Predicted labels')
_ = ax[0].set_ylabel('True labels');
_ = ax[0].set_title('Confusion Matrix');
_ = ax[0].xaxis.set_ticklabels(Labels)
_ = ax[0].yaxis.set_ticklabels(Labels)
CM = CM.astype('float') / CM.sum(axis=1)[:, np.newaxis]
_ = sns.heatmap(CM.round(2), annot=True, annot_kws={"size": 14}, cmap="Greens", ax = ax[1],
linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": 1})
_ = ax[1].set_xlabel('Predicted labels')
_ = ax[1].set_ylabel('True labels');
_ = ax[1].set_title('Normalized Confusion Matrix');
_ = ax[1].xaxis.set_ticklabels(Labels)
_ = ax[1].yaxis.set_ticklabels(Labels)
return fig, ax
def Plot_Classification(Model, X, y, Labels, BP = .5, Alpha=0.6, ax = False, fs = 7, ColorMap = 'Spectral'):
h=0.02
pad=0.25
# adding margins
x_min, x_max = X[:, 0].min()-pad, X[:, 0].max()+pad
y_min, y_max = X[:, 1].min()-pad, X[:, 1].max()+pad
# Generating meshgrids
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# Predictions
Pred = Model.predict(np.c_[xx.ravel(), yy.ravel()])
if y.shape[1] > 1:
Pred = Pred.argmax(axis = 1)
Pred = Pred.reshape(xx.shape)
# Figure
if ax == False:
fig, ax = plt.subplots(1, 1, figsize=(fs, fs))
_ = ax.contourf(xx, yy, Pred, cmap = ColorMap, alpha=0.2)
if y.shape[1] > 1:
y = y.argmax(axis = 1)
else:
y = np.round(y).T[0]
scatter = ax.scatter(X[:,0], X[:,1], s=70, c=y, edgecolor = 'Navy', alpha = Alpha, cmap = ColorMap)
_ = ax.legend(handles=scatter.legend_elements()[0], labels= Labels,
fancybox=True, framealpha=1, shadow=True, borderpad=BP, loc='best', fontsize = 14)
_ = ax.set_xlim(x_min, x_max)
_ = ax.set_ylim(y_min, y_max)
_ = ax.set_xlabel(r'$X_1$')
_ = ax.set_ylabel(r'$X_2$')
Plot_history(Train_Table, Title = 'Train Set')
Plot_history(Validation_Table, Title = 'Validation Set')
The confusion matrix allows for visualization of the performance of an algorithm.
fig, _ = Confusion_Matrix(model, X_train, y_train, Labels)
_ = fig.suptitle('Train Set', fontsize = 16)
fig, _ = Confusion_Matrix(model, X_test, y_test, Labels)
_ = fig.suptitle('Test Set', fontsize = 16)
fig, ax = plt.subplots(1, 2, figsize=(16, 7))
# Train Set
Plot_Classification(model, X_train, y_train, Labels, BP = .2, ax = ax[0])
_ = ax[0].set_title('Train Set', fontsize = 16)
# Test Set
Plot_Classification(model, X_test, y_test, Labels, BP = .2, ax = ax[1])
_ = ax[1].set_title('Test Set', fontsize = 16)
for i in range(len(ax)):
_ = ax[i].axis('off')